Initialization Code

Run this section once at the beginning of each R session.

library(dplyr)
library(Seurat)
library(patchwork)
library(RColorBrewer)
library(EnhancedVolcano)

custom_color_palette <-  c("#1B9E77", "#D95F02", "#7570B3", "#CA00C4",
                          "#66A61E", "#E6AB02", "#A6761D", "#666666", "#194D33")

LN_Trm_genes <- c("Acap1", "Actn2", "Amica1", "Arhgef1", "Atxn7l3b", "Aw112010",
                  "B4galnt1", "Bcl11b", "Cbx3", "Ccnd2", "Ccr10", "Cd27", "Cd7",
                  "Cd74", "Chd3", "Cirbp", "Clec2d", "Crot", "Csf1", "Cxcr3",
                  "Cxcr6", "Eif5", "Evl", "Fam189b", "Fubp1", "Fyb", "Gramd1a",
                  "Sema4a","Gstp1", "Shisa5", "H2-T23", "Sipa1", "Hmgn1",
                  "Slfn2", "Hmha1", "Sp100", "Hsp90b1", "Spcs2", "Id2", "Srrm2",
                  "Ifitm10", "Stap1", "Ikzf3", "Tbc1d10c", "Il16", "Tesc",
                  "Il18r1", "Tnfaip8", "Il7r", "Tnrc6a", "Irf2bpl", "Tsc22d4",
                  "Itgae", "Uba52", "Itgal", "Ucp2", "Itm2c", "Wbp1", "Lfng",
                  "Xist", "Lpar6", "Ypel3", "Lrrc58", "Zbtb7a", "Ltb", "Znrf1",
                  "Ly6a", "Ly6e", "Ly6g5b", "Malat1", "Mbnl1", "Mrpl52", "Mxd4",
                  "Mycbp2", "N4bp2l2", "N4bp2l2", "Ndufa3", "Ndufa5", "Nktr",
                  "Nudcd3", "Ogt", "Pdcd4", "Pdia3", "Pdia6", "Ptpn7", "Ptprc",
                  "Rapgef6", "Rbpj", "Rgs10", "Rpl15", "Rpl35", "Rpl38",
                  "Rps28", "Rps29", "Sash3")

TGFb_genes <- c("Mcpt2","Cdh1","Spsb1","Vipr2","Gpr56","Src","Ppp2r2c","Lrig1",
                "Itgae","Agap1","Ncmap","Pmepa1","Sema6d","Emid1","Cd33","Dlk1",
                "Ldlrad4","Car2","Cpd","Nt5e","Tspan9","Gsg2","Klhl30",
                "1810011H11Rik","Osgin1","Ccl1","Litaf","Itga1","Kifc3",
                "Hsf2bp","Asic3","Abi3","Smurf2","Phactr2","Oplah","Qpct",
                "Tfr2","Isg20","Rnase6","Rgs1","2900026A02Rik","Mmp11",
                "Tnfsf11","Nrarp","Cyb561","Smyd1","Kcnip2","Cx3cr1","Nek6",
                "Nlrp1b","St8sia1","Arhgap39","Jup","Htra3","Rgs16","H2-M5",
                "Chn2","Cish","Atp6v0a1","Skil","Dok3","Igflr1","Ccr8","Timp2",
                "Zfyve28","Ppm1n","Hpgds","B4galnt4","Ifng","Ctnnal1","Clec12a",
                "Exoc3l","Coro2a","Ikzf4","Adamts6","D8Ertd82e","Smpd5","Aqp3",
                "Evpl","Ramp1","St8sia6","Xcl1","Scn1b","Rnf149","Dtx4","Gngt2",
                "Sbk1","Tbc1d16","Tnfrsf13c","Gna12","Ermn","Neu3","Fmnl3",
                "Cd83","Epb4.1l2","Ccdc112","Adam19","Rab26","Fam101b","Mical3",
                "Prkcz","Grina","Slc27a6","Tgfbr3","Fgfr1","Msc","Rgs10",
                "Lonrf1","Lax1","Kcnc1","Nphp1","Slc16a10","Kif13a","Ninj1",
                "Smyd3","9430020K01Rik","Csgalnact1","Gpaa1","Ski","Gcnt4",
                "Map9","Egr3","Fam161a","Egr1","Fndc3a","Mapkapk3","Ctss",
                "Hnrnpll","Galm","Dusp2","Stom","Esm1","1700049G17Rik",
                "Plekho1","Med10","Smtn","Gpr34","Sepn1","Egr2","Prrt2","Aen",
                "Cd101","Gtf2ird1","Tiam1","Camkk1","D430042O09Rik","Fam214b",
                "Matk","Ralgps1","Dapk2","Usp6nl","Foxred2","Wdyhv1","Znrf1",
                "Tjp1","Irf8","Hemk1","Pgap1","Accs","Aim2","Per3","Zfr2",
                "Lgalsl","1700001L05Rik","Zfp820","D3Ertd254e","Gcnt1",
                "Slc41a2","Ttc39b","Gclm","Peg13","Slc9a1","Adora3","Cers6",
                "Ccrn4l","Cd96","Golim4","Lpcat2","Lsr","Acsbg1","Eef2k",
                "Plekhf1","Rbm20","Ssx2ip","Ankrd50","Igfbp4","Inpp4b",
                "Irf2bpl","Pygl","Zfp1","Golm1","Gpr68","Ptgfrn","Tsc22d1",
                "Abca1","Fam124b","Itpripl2","Bcl6","Lysmd2","Trp53inp2",
                "Zdhhc13","Bpgm","F2r","Frmd4b","Ctsw","Swap70","Frmd6","Gas7",
                "Gdpd5","Spire1","Tet3","Batf3","Dstyk","Luzp1","Mgat5","Ptpre",
                "Ralgps2","Mif4gd","Stat1","Ttc3","Abhd15","Cerk","Adssl1",
                "Pcyt1b","Rai1","Blcap","Map3k14","Rnf19b","Scai","Tmem57",
                "Atp6v1g2","Chst12","Fam20a","Gtf3c1","Trp53inp1","Wdr78",
                "Aars2","Cd244","Ly6g5b","Tbx6","Usp22","Zfp827",
                "1600014C10Rik","Als2","Arhgef5","B4galt5","Nfat5","Prkacb",
                "Rgs2","Slc9a3r1","Soat1","Tctn3","Ttc39c","Cotl1","Ldlrap1",
                "Ncf1","Iigp1","Ikzf3","Ipcef1","Irf4","Abi2","Runx3","Ypel3",
                "Entpd1","Fut8","Inpp5f","Apol7e","Arhgef12","Nrp1","Slc26a11",
                "Tnfrsf1b","Cd160","Gfod1","Gm12185","H6pd","Pmm1","Tmem2",
                "Ublcp1","Dennd3","Gramd1a","Idh2","Ppip5k1","Slc39a13",
                "Baiap3","Extl3","Mxd1","Nipal1","Rrp1b","Twsg1","Cdc42bpg",
                "Celsr1","Ehd1","Kit","Slc22a15","Tmcc1","Camsap2","Klhl25",
                "Ncf4","Plcxd2","Rab11fip4","Specc1","Fam3c","Fuca2","Pde4a",
                "Prr12","Ctnnb1","Egln3","Fam46a","Fbxo25","Gprin3","Scly",
                "AW112010","Cd1d1","Lrrc61","Clstn1","Exosc4","Smad7","Susd3",
                "Traf4","Vasp","Gne","Gpbp1l1","Prkch","Rab37","Rbpj","Usp11",
                "AA467197","Bmpr2","Cd8a","Dpp9","Inpp5d","Kif1b","Lasp1",
                "Rftn1","Wee1","Fasl","Nbas","Plscr1","Prkdc","Rhoh","Spcs2",
                "Suox","Tbc1d4","Tgif1","Anp32a","Lnpep","Myo5a","Rreb1",
                "Zfp706","Ermp1","Fam149b","Glrx","Pacsin2","Plekha2","Sorl1",
                "Dnajc9","Nbeal1","Plod2","Ssh2","Trappc10","Ercc6","Fchsd2",
                "Gfi1","Ubn2","Vps54","Actr1b","Ccni","Cd2bp2","Tnfsf10",
                "Acot11","Atad2","Lgals9","Nup153","Gtpbp1")

Initial QC, Pre-Processing, and Run

Create the Seurat Object.

scobj.data <- Read10X(data.dir="LN/filtered/")
scobj <- CreateSeuratObject(counts=scobj.data, project="2022-12-29_LN", 
                         min.cells=3, min.features=200)

QC & Filtering

Calculate the % mitochondrial and ribosomal genes for each cell.

scobj[["percent.mt"]] <- PercentageFeatureSet(scobj, pattern="^mt-")
scobj[["percent.rps"]] <- PercentageFeatureSet(scobj, pattern="Rps")
scobj[["percent.rpl"]] <- PercentageFeatureSet(scobj, pattern="Rpl")

Visualize the QC features as violin plots.

VlnPlot(scobj, features=c("nFeature_RNA", "nCount_RNA"), ncol=2)

VlnPlot(scobj, features=c("percent.mt", "percent.rps", "percent.rpl"), ncol=3)

Visualize the QC features as FeatureScatter plots.

plot1 <- FeatureScatter(scobj, feature1="nCount_RNA", feature2="percent.mt")
plot2 <- FeatureScatter(scobj, feature1="nCount_RNA", feature2="nFeature_RNA")
plot1 + plot2

Apply filters and normalize data.

scobj <- subset(scobj, subset=(nFeature_RNA > 100 & nFeature_RNA < 6000 
                & percent.mt < 5 & percent.rps < 20 & percent.rpl < 23))
scobj <- NormalizeData(scobj, normalization.method="LogNormalize",
                       scale.factor=10000)

Identify Highly Variable Features

scobj <- FindVariableFeatures(scobj, selection.method="vst", nfeatures=2500)

Scale Data

all.genes <- rownames(scobj)
scobj <- ScaleData(scobj, features=all.genes)
## Centering and scaling data matrix

Linear Dimensional Reduction & Determine Dimensionality

Perform PCA.

scobj <- RunPCA(scobj, features=VariableFeatures(object=scobj))
## PC_ 1 
## Positive:  Pclaf, Birc5, Stmn1, Mki67, Cdca8, Nusap1, Hist1h1b, Tpx2, Top2a, Ccna2 
##     Knl1, Spc24, Hmmr, Rrm2, Kif11, Esco2, Cks1b, Neil3, Bub1, Cdk1 
##     Cdca3, Depdc1a, Hist1h2af, Ube2c, Aurkb, Ccnb2, Hist1h3c, Asf1b, Ckap2l, Mxd3 
## Negative:  Arhgap15, Ccnd3, Foxp1, Themis, Dock2, Lncpint, Gm2682, Maml2, Zbtb20, Gm26740 
##     Stat4, Ripor2, Dock10, Slc9a9, Elmo1, Skap1, Smyd3, Ikzf1, Pitpnc1, Inpp4b 
##     Tcf12, Dennd4a, Fgf13, Tnik, Nedd9, Zswim6, Cblb, Thada, Atxn1, Zeb1 
## PC_ 2 
## Positive:  Dock10, Arhgap15, Dock2, Slc9a9, Themis, Skap1, Elmo1, Gm2682, Maml2, Atp8b4 
##     Ikzf1, Atxn1, Pitpnc1, Stag1, Foxp1, Tcf12, Lncpint, Ccnd3, Fam172a, Exoc4 
##     Iqgap2, Zbtb20, Rabgap1l, Lrba, Pag1, Cblb, Slco3a1, Zeb1, Mctp2, Smyd3 
## Negative:  Rplp0, Rpl41, Rps20, Rps2, Rps12, Rpl28, Actb, Pfn1, Ppia, Cd52 
##     Nme2, Ccl5, S100a6, Sh3bgrl3, Actg1, S100a10, Hsp90ab1, Gapdh, Rbm3, Crip1 
##     Eif5a, Emp3, Rps17, Ly6a, Lgals1, Ptma, Vim, Clic1, Prdx1, Ndufa4 
## PC_ 3 
## Positive:  Igkc, Cd79a, Ebf1, Mef2c, Ighd, Bank1, Iglc2, H2-DMb2, Cd74, H2-Ab1 
##     H2-Eb1, Pax5, Fcmr, H2-Aa, Ms4a1, Ly6d, Iglc3, Blnk, March1, Syk 
##     Gm31243, Cd79b, Ly86, Fcer2a, Ciita, Wdfy4, Bcl11a, Siglecg, Napsa, Blk 
## Negative:  Gm2682, Skap1, Themis, Atp8b4, Epsti1, Slc9a9, Atxn1, Iqgap2, Runx2, Ahnak 
##     Slco3a1, Pitpnc1, Gm44174, Ccl5, S100a6, Fgf13, Itgb1, Cd226, Ust, Ctla2a 
##     Camk4, Gramd1b, Maml2, Ccr2, Gab3, Sntb1, Mllt3, Dock10, Sidt1, Stat4 
## PC_ 4 
## Positive:  Csf1, Chn2, Gzmb, Gm36723, Osbpl3, Itga1, Atp8a2, AU020206, Hip1, Vav3 
##     Gstm5, Rbpj, Cish, 2900026A02Rik, Itgae, Tjp1, Rbms3, Inpp4b, Coro2a, Fgl2 
##     Mmp16, Mdfic, Pip5k1b, Gm42418, Arsb, Filip1, Rgs1, Pde11a, Cd226, St6galnac3 
## Negative:  Sidt1, Aff3, Sell, Itgb1, Klf3, Nme2, Rplp0, 1700025G04Rik, Rps2, Bach2 
##     Arhgef18, Rps12, Pde2a, Rps20, Cmah, Actn1, H2afz, Hsp90ab1, Rpl41, Atp1a1 
##     Rasa3, Ndufa4, Crip1, Nsg2, Elmo1, Tagln2, Lef1, Itga4, Nrp1, Txk 
## PC_ 5 
## Positive:  S100a6, Lgals1, S100a4, S100a10, Sh3bgrl3, Cd52, Crip1, Vim, Lgals3, Capg 
##     Ly6a, Ahnak, Ifitm2, Ifitm1, Anxa1, Ccr2, Actb, Emp3, Pfn1, Ifitm3 
##     Gapdh, Ppia, Clic1, Cd48, Rpl41, Ctla2a, AA467197, Gzmb, Reep5, Rps2 
## Negative:  Apoe, Gm42418, Ifngas1, Lyz2, Dapl1, Nusap1, Tox, Btbd11, Mxd3, Hist1h2af 
##     Kif14, Aif1, Esco2, C1qa, Anln, Knl1, Mis18bp1, Kif11, Hist1h2ab, Cenpf 
##     Taco1, Hist1h3c, Klf3, Hist1h1b, Prc1, Bub1b, C1qb, Hist1h2bb, Ccnf, Birc5

Jackstraw and Elbow Plots to visualize dimensionality of dataset.

scobj <- JackStraw(scobj, dims=30, num.replicate=100)
scobj <- ScoreJackStraw(scobj, dims=1:30)
JackStrawPlot(scobj, dims=1:30)
## Warning: Removed 56380 rows containing missing values (`geom_point()`).

ElbowPlot(scobj, ndims=30)

Cluster Cells

Run clustering and UMAP algorithms. Dims based on Jackstraw and Elbow plots.

scobj <- FindNeighbors(scobj, dims=1:15) 
## Computing nearest neighbor graph
## Computing SNN
scobj <- FindClusters(scobj, resolution=0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 4102
## Number of edges: 141924
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8766
## Number of communities: 9
## Elapsed time: 0 seconds
scobj <- RunUMAP(scobj, dims=1:15)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 13:08:06 UMAP embedding parameters a = 0.9922 b = 1.112
## 13:08:06 Read 4102 rows and found 15 numeric columns
## 13:08:06 Using Annoy for neighbor search, n_neighbors = 30
## 13:08:06 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 13:08:06 Writing NN index file to temp file /tmp/RtmpnVd11M/file31d894ca6680
## 13:08:06 Searching Annoy index using 1 thread, search_k = 3000
## 13:08:08 Annoy recall = 100%
## 13:08:08 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 13:08:09 Initializing from normalized Laplacian + noise (using irlba)
## 13:08:09 Commencing optimization for 500 epochs, with 169224 positive edges
## 13:08:27 Optimization finished

Show the UMAP and the number of cells in each cluster.

DimPlot(scobj, reduction="umap", pt.size=2.5, cols=custom_color_palette)

table(Idents(scobj))
## 
##    0    1    2    3    4    5    6    7    8 
## 1567  934  596  379  182  168  126   78   72

Generate Initial Heatmap

Find markers for all clusters.

scobj.markers <- FindAllMarkers(scobj, only.pos=TRUE, min.pct=0.25,
                                logfc.threshold=0.25)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 5
## Calculating cluster 6
## Calculating cluster 7
## Calculating cluster 8

Generate heatmap for top 10 genes in all clusters.

scobj.markers %>%
    group_by(cluster) %>%
    top_n(n=10, wt=avg_log2FC) -> top10
DoHeatmap(scobj, features=top10$gene, 
          group.colors=custom_color_palette) + NoLegend()

Remove Non-CD8+ T Cells –> Subset 1

sub1_scobj <- subset(x=scobj, idents=c(4, 7, 8), invert=TRUE)

Re-Run Analysis on Subset 1 (Sub1)

Determine highly variables genes.

sub1_scobj <- FindVariableFeatures(sub1_scobj, selection.method="vst", 
                                   nfeatures=2500)

Rescale data.

sub1_all.genes <- rownames(sub1_scobj)
sub1_scobj <- ScaleData(sub1_scobj, features=sub1_all.genes)
## Centering and scaling data matrix

Re-run PCA.

sub1_scobj <- RunPCA(sub1_scobj, features=VariableFeatures(object=sub1_scobj))
## PC_ 1 
## Positive:  Pclaf, Birc5, Mki67, Nusap1, Stmn1, Hist1h1b, Top2a, Kif11, Tpx2, Cdca8 
##     Knl1, Neil3, Esco2, Hmmr, Ccna2, Spc24, Rrm2, Bub1, Hist1h2af, Cdk1 
##     Depdc1a, Cdca3, Aurkb, Shcbp1, Kif4, Hist1h3c, Cks1b, Ccnb2, Ube2c, Bub1b 
## Negative:  Ccl5, Ccnd3, Arhgap15, Foxp1, Gm26740, Bcl2, Stat4, Sidt1, Lef1, Gm2682 
##     Ripor2, Prkcq, Gm42418, Lncpint, Scml4, Peli1, Sell, Camk1d, Zbtb20, Pde7a 
##     Themis, mt-Co1, Utrn, Junb, Satb1, Bach2, Dock2, Thada, Mbnl1, Maml2 
## PC_ 2 
## Positive:  Rpsa, Rplp0, Rpl41, Rps2, Tmsb4x, Ppia, Rpl28, Rps20, Pfn1, Actb 
##     Actg1, Nme2, Cd52, Sh3bgrl3, S100a10, Rps12, Gapdh, Rbm3, Hspa8, Emp3 
##     Crip1, S100a6, Eif5a, Vim, Lgals1, Prdx1, Clic1, Ptma, H2afz, Ly6a 
## Negative:  Arhgap15, Themis, Dock2, Dock10, Slc9a9, Gm2682, Mbnl1, Skap1, Utrn, Ankrd44 
##     Atp8b4, Grap2, Maml2, Elmo1, Atxn1, Ikzf1, Pitpnc1, Macf1, Prkcq, Foxp1 
##     Arhgap26, Tcf12, Ccnd3, Zbtb20, Ppm1h, Lncpint, Aak1, Vps13b, Pde7a, Stag1 
## PC_ 3 
## Positive:  Csf1, Gzmb, Chn2, Itga1, Gm36723, AU020206, Cish, Osbpl3, Rbpj, Hip1 
##     Gstm5, S100a4, Cd52, Atp8a2, Fgl2, Sh3bgrl3, Ly6a, Vav3, Itgae, 2900026A02Rik 
##     Inpp4b, Tjp1, Mdfic, Havcr2, Rbms3, Cd226, Coro2a, AA467197, Capg, Pik3ap1 
## Negative:  Sidt1, Aff3, Sell, Rplp0, Klf3, Rps12, Rps2, Rpsa, 1700025G04Rik, Bach2 
##     Itgb1, Rps20, Pde2a, Nme2, Arhgef18, Ccr7, Cmah, Nsg2, Lef1, Rpl41 
##     Actn1, Elmo1, Itga4, Txk, Arhgap15, Slamf6, Fgf13, Il6ra, H2afz, Ndufa4 
## PC_ 4 
## Positive:  Lgals3, S100a6, Lgals1, Vim, Ifitm1, Ifitm2, Capg, Crip1, Ahnak, S100a10 
##     S100a4, Rap1gap2, Dgkh, Itgb1, Itsn1, Runx2, Anxa1, Nebl, Cd48, Ccr2 
##     Iqgap2, Nrp1, Ifitm3, Reep5, Emp3, Klf12, Fry, Swap70, Rasa3, Ripor2 
## Negative:  Plac8, Ifi27l2a, Xcl1, Gimap7, Ifngas1, Ighm, Btbd11, Tox, Eomes, Dapl1 
##     Samd3, Isg15, A330040F15Rik, Ifit1, Prkch, Usp18, Ddx60, Chn2, Setbp1, Abtb2 
##     Rps20, Rgs1, Oas1a, Rnf213, Cpq, Ifit3, Rtp4, Atp8a2, Ifit3b, Gzmk 
## PC_ 5 
## Positive:  Ube2c, Cenpf, Ccnb1, Aspm, Ccnb2, Kif2c, Prr11, Hmmr, Pif1, Cdkn3 
##     Pimreg, Cenpa, Ckap2, Ccna2, Cdc20, Cdca3, Cenpe, Arhgap19, Birc5, Gas2l3 
##     Kif14, Plk1, Mxd3, Cep55, Nusap1, Depdc1a, Nuf2, Kif23, Cdc25c, Aurka 
## Negative:  Dtl, E2f1, Cdca7, Lig1, Chaf1a, Hells, Cdc6, Mcm5, Uhrf1, Chek1 
##     Rad51b, Mcm3, Mcm6, Mcm2, Mcm4, Timeless, Mcm7, Pole, Atad5, Pola1 
##     Brca1, Dhfr, Clspn, Mms22l, Cdc45, Hsp90ab1, Rad51, Ranbp1, Cenps, Fignl1

Re-run Jackstraw and Elbow plots to determine new dimensionality.

sub1_scobj <- JackStraw(sub1_scobj, dims=30, num.replicate=100)
sub1_scobj <- ScoreJackStraw(sub1_scobj, dims=1:30)
JackStrawPlot(sub1_scobj, dims=1:30)
## Warning: Removed 57280 rows containing missing values (`geom_point()`).

ElbowPlot(sub1_scobj, ndims=30)

Rerun clustering and UMAP algorithms. Dims based on Jackstraw and Elbow plots.

sub1_scobj <- FindNeighbors(sub1_scobj, dims=1:25)
## Computing nearest neighbor graph
## Computing SNN
sub1_scobj <- FindClusters(sub1_scobj, resolution=0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 3770
## Number of edges: 146241
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8493
## Number of communities: 6
## Elapsed time: 0 seconds
sub1_scobj <- RunUMAP(sub1_scobj, dims=1:25)
## 13:13:08 UMAP embedding parameters a = 0.9922 b = 1.112
## 13:13:08 Read 3770 rows and found 25 numeric columns
## 13:13:08 Using Annoy for neighbor search, n_neighbors = 30
## 13:13:08 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 13:13:09 Writing NN index file to temp file /tmp/RtmpnVd11M/file31d897dc7231
## 13:13:09 Searching Annoy index using 1 thread, search_k = 3000
## 13:13:10 Annoy recall = 100%
## 13:13:11 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 13:13:12 Initializing from normalized Laplacian + noise (using irlba)
## 13:13:12 Commencing optimization for 500 epochs, with 166974 positive edges
## 13:13:29 Optimization finished

Show the UMAP and the number of cells in each cluster.

DimPlot(sub1_scobj, reduction="umap", pt.size=2.5, cols=custom_color_palette)

table(Idents(sub1_scobj))
## 
##    0    1    2    3    4    5 
## 1675  868  596  347  180  104

Find markers for all clusters.

sub1_scobj.markers <- FindAllMarkers(sub1_scobj, only.pos=TRUE, min.pct=0.25,
                                     logfc.threshold=0.25)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 5

Generate heatmap for top 10 genes in all clusters.

sub1_scobj.markers %>%
  group_by(cluster) %>%
  top_n(n=10, wt=avg_log2FC) -> sub1_top10
DoHeatmap(sub1_scobj, features=sub1_top10$gene, 
          group.colors=custom_color_palette) + NoLegend()

Perform Cell Cycle Regression

Cluster 4 has two top genes (Top2a, Mki67) implicated in cell cycle. This code attempts to regress these signals from the dataset. The result was not used for further analysis, but additional B cells are discovered in the process, which need to be removed from Subset 1.

Set up a copy of sub1_scobj and generate cell cycle gene lists in proper format.

scobj_cc <- sub1_scobj

s.genes <- tolower(cc.genes$s.genes)
for(i in 1:length(s.genes)){
  substr(s.genes[i],1,1) <- toupper(substr(s.genes[i], 1, 1))
}

g2m.genes <- tolower(cc.genes$g2m.genes)
for(i in 1:length(g2m.genes)){
  substr(g2m.genes[i],1,1) <- toupper(substr(g2m.genes[i], 1, 1))
}

Perform the cell cycle scoring and regression.

scobj_cc <- CellCycleScoring(scobj_cc, s.features=s.genes, 
                               g2m.features=g2m.genes, set.ident=FALSE)
## Warning: The following features are not present in the object: Mlf1ip, not
## searching for symbol synonyms
## Warning: The following features are not present in the object: Fam64a, Hn1, not
## searching for symbol synonyms
scobj_cc <- ScaleData(scobj_cc, vars.to.regress=c("S.Score", "G2M.Score"), 
                      features=rownames(scobj_cc))
## Regressing out S.Score, G2M.Score
## Centering and scaling data matrix

Rerun all analysis from Subset 1 on the regressed dataset.

scobj_cc <- RunPCA(scobj_cc, features=VariableFeatures(object=scobj_cc))
## PC_ 1 
## Positive:  Arhgap15, Themis, Dock2, Dock10, Slc9a9, Gm2682, Mbnl1, Skap1, Utrn, Ankrd44 
##     Atp8b4, Grap2, Maml2, Elmo1, Atxn1, Ikzf1, Pitpnc1, Macf1, Prkcq, Foxp1 
##     Arhgap26, Tcf12, Ccnd3, Zbtb20, Lncpint, Ppm1h, Aak1, Vps13b, Stag1, Pde7a 
## Negative:  Rpsa, Rplp0, Rpl41, Rps2, Ppia, Tmsb4x, Rpl28, Rps20, Pfn1, Actb 
##     Nme2, Actg1, Cd52, Rps12, Sh3bgrl3, S100a10, Gapdh, Rbm3, Hspa8, Emp3 
##     Crip1, S100a6, Eif5a, Vim, Lgals1, Ptma, Prdx1, H2afz, Clic1, Hsp90ab1 
## PC_ 2 
## Positive:  Sidt1, Sell, Aff3, Rplp0, Klf3, Rps12, Bach2, Rpsa, Rps2, 1700025G04Rik 
##     Pde2a, Rps20, Cmah, Itgb1, Arhgef18, Lef1, Ccr7, Arhgap15, Nsg2, Itga4 
##     Gm26740, Txk, Elmo1, Nme2, Actn1, Slamf6, Fgf13, Foxp1, Il6ra, Rpl41 
## Negative:  Csf1, Gzmb, Chn2, S100a4, Gm36723, Cd52, Itga1, Cish, Rbpj, Osbpl3 
##     Hip1, AU020206, Sh3bgrl3, Gstm5, Fgl2, Ly6a, Itgae, Tjp1, Atp8a2, 2900026A02Rik 
##     Vav3, Capg, Inpp4b, AA467197, Mdfic, Havcr2, Coro2a, Rbms3, Ccr2, Cd226 
## PC_ 3 
## Positive:  Plac8, Gimap7, Xcl1, Ifi27l2a, Ighm, Ifngas1, Btbd11, Tox, Eomes, Isg15 
##     Samd3, Dapl1, Chn2, A330040F15Rik, Ifit1, Rgs1, Prkch, Ddx60, Usp18, Atp8a2 
##     Setbp1, Oas1a, Rnf213, Ifit3, Abtb2, Rps20, Rtp4, Vav3, Cpq, St6galnac3 
## Negative:  Lgals3, Lgals1, S100a6, Vim, Ifitm1, Crip1, Ifitm2, Capg, S100a10, Ahnak 
##     Rap1gap2, S100a4, Itgb1, Dgkh, Cd48, Runx2, Nrp1, Iqgap2, Nebl, Itsn1 
##     Anxa1, Ccr2, Reep5, Emp3, Ifitm3, Ripor2, Rasa3, Klf12, Fry, Swap70 
## PC_ 4 
## Positive:  Hmgb2, Chn2, Smc4, Cenpa, Esm1, Mcm6, Cbx5, Gzmk, Mcm2, Atp8a2 
##     Pde7a, Anp32e, Xcl1, Plac8, 1500009L16Rik, AU020206, Ifi27l2a, Csf1, Ckap5, Pip5k1b 
##     Cdca7, Gzmb, E2f1, Pde3b, Cdc20, Vav3, Isg15, Il2ra, Rgs1, Mndal 
## Negative:  Hist1h2af, Hist1h3c, Esco2, Hist1h1b, Pbk, Hist1h2bb, Neil3, Hist1h2ab, Hist1h2ae, Nusap1 
##     Aurkb, Hist1h2an, Mxd3, Hist1h2ap, Hist1h3b, Kif14, Hist1h2bm, Kif11, BC030867, Knl1 
##     Ncaph, Anln, Pclaf, Hist1h1d, Hist1h2bj, Hist1h3i, Mis18bp1, Melk, Ccnf, Hist1h2ak 
## PC_ 5 
## Positive:  Isg15, Ifit1, Ifit3, Rsad2, Ifit3b, Rtp4, Usp18, Slfn5, Cmpk2, Rnf213 
##     Zbp1, Irf7, Trim30a, Oasl2, Xaf1, A330040F15Rik, Parp9, Phf11c, Stat1, Herc6 
##     Eif2ak2, Ifi206, Ifih1, Gbp6, A930037H05Rik, Bst2, Isg20, Ifi209, Trim30c, Oas3 
## Negative:  Inpp4b, Chn2, Prkch, Dusp2, Pip5k1b, Coro2a, Csf1, Vav3, Atp8a2, Tox 
##     Ttc3, St6galnac3, 2900026A02Rik, Rgs1, Adam19, Pde3b, Pde7a, Xcl1, Cish, Smyd3 
##     AU020206, Ssbp2, Znrf1, Zbtb20, 4933406I18Rik, Mmp16, Itga1, Btbd11, Itgae, Arsb
scobj_cc <- JackStraw(scobj_cc, dims=30, num.replicate=100)
scobj_cc <- ScoreJackStraw(scobj_cc, dims=1:30)
JackStrawPlot(scobj_cc, dims=1:30)
## Warning: Removed 56178 rows containing missing values (`geom_point()`).

ElbowPlot(scobj_cc, ndims=30)

scobj_cc <- FindNeighbors(scobj_cc, dims=1:25)
## Computing nearest neighbor graph
## Computing SNN
scobj_cc <- FindClusters(scobj_cc, resolution=0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 3770
## Number of edges: 147678
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8436
## Number of communities: 8
## Elapsed time: 0 seconds
scobj_cc <- RunUMAP(scobj_cc, dims=1:25)
## 13:52:19 UMAP embedding parameters a = 0.9922 b = 1.112
## 13:52:19 Read 3770 rows and found 25 numeric columns
## 13:52:19 Using Annoy for neighbor search, n_neighbors = 30
## 13:52:19 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 13:52:20 Writing NN index file to temp file /tmp/RtmpnVd11M/file31d89647aed1e
## 13:52:20 Searching Annoy index using 1 thread, search_k = 3000
## 13:52:21 Annoy recall = 100%
## 13:52:22 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 13:52:22 Initializing from normalized Laplacian + noise (using irlba)
## 13:52:23 Commencing optimization for 500 epochs, with 166362 positive edges
## 13:52:40 Optimization finished
DimPlot(scobj_cc, reduction="umap", pt.size=2.5, cols=custom_color_palette)

table(Idents(scobj_cc))
## 
##    0    1    2    3    4    5    6    7 
## 1365 1286  583  361  111   27   20   17
scobj_cc.markers <- FindAllMarkers(scobj_cc, only.pos=TRUE, min.pct=0.25,
                                     logfc.threshold=0.25)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 5
## Calculating cluster 6
## Calculating cluster 7
scobj_cc.markers %>%
  group_by(cluster) %>%
  top_n(n=10, wt=avg_log2FC) -> scobj_cc_top10
DoHeatmap(scobj_cc, features=scobj_cc_top10$gene, 
          group.colors=custom_color_palette) + NoLegend()

Cluster 7 contains B cells. Write the barcodes of cells in the cluster so they can be removed from the non-regressed dataset.

write.csv(CellsByIdentities(object=scobj_cc, idents=7), "B_cell_barcodes.csv")

Remove B-Cells by Barcode

Remove the cells from Subset 1 object for simpler re-analysis.

barcodes_to_remove_df = read.csv("B_cell_barcodes.csv")
barcodes_to_remove_df$X <- NULL

barcodes_to_remove = character(length(barcodes_to_remove_df$X7))
for(i in 1:length(barcodes_to_remove_df$X7)){
  barcodes_to_remove[i] = as.character(barcodes_to_remove_df$X7[i])
}

sub1_scobj <- sub1_scobj[, !(sub1_scobj@assays$RNA@data@Dimnames[[2]] 
                             %in% barcodes_to_remove)]

Re-Run Subset 1 Analysis

This section is a copy of the code from above.

Determine highly variables genes.

sub1_scobj <- FindVariableFeatures(sub1_scobj, selection.method="vst", 
                                   nfeatures=2500)

Rescale data.

sub1_all.genes <- rownames(sub1_scobj)
sub1_scobj <- ScaleData(sub1_scobj, features=sub1_all.genes)
## Centering and scaling data matrix

Re-run PCA.

sub1_scobj <- RunPCA(sub1_scobj, features=VariableFeatures(object=sub1_scobj))
## PC_ 1 
## Positive:  Pclaf, Birc5, Mki67, Nusap1, Stmn1, Hist1h1b, Kif11, Top2a, Tpx2, Cdca8 
##     Knl1, Neil3, Esco2, Ccna2, Hmmr, Spc24, Rrm2, Hist1h2af, Bub1, Cdk1 
##     Depdc1a, Aurkb, Cdca3, Hist1h3c, Shcbp1, Kif4, Ccnb2, Cks1b, Ube2c, Mxd3 
## Negative:  Ccl5, Ccnd3, Foxp1, Arhgap15, Gm26740, Bcl2, Lef1, Stat4, Sidt1, Gm42418 
##     Sell, Scml4, Ripor2, Prkcq, Peli1, Lncpint, Junb, Gm2682, Zbtb20, Pde7a 
##     Bach2, Thada, Satb1, Klf3, Themis, Utrn, Zswim6, Gab3, Sntb1, Mbnl1 
## PC_ 2 
## Positive:  Arhgap15, Themis, Dock2, Dock10, Slc9a9, Gm2682, Mbnl1, Skap1, Utrn, Maml2 
##     Ankrd44, Atp8b4, Grap2, Elmo1, Atxn1, Ikzf1, Pitpnc1, Macf1, Prkcq, Foxp1 
##     Arhgap26, Tcf12, Ccnd3, Zbtb20, Lncpint, Ppm1h, Vps13b, Aak1, Pde7a, Zeb1 
## Negative:  Rpsa, Rplp0, Rpl41, Tmsb10, Tmsb4x, Rps2, Ppia, Rpl28, Rps20, Pfn1 
##     Actg1, Actb, Cd52, Nme2, Sh3bgrl3, S100a10, Rps12, Gapdh, Emp3, Hspa8 
##     Rbm3, Crip1, S100a6, Eif5a, Vim, Lgals1, Ptma, Prdx1, Clic1, H2afz 
## PC_ 3 
## Positive:  Csf1, Gzmb, Id2, Chn2, Itga1, Gm36723, AU020206, Cish, Rbpj, S100a4 
##     Osbpl3, Hip1, Gstm5, Cd52, Sh3bgrl3, Fgl2, Atp8a2, Ly6a, Itgae, Vav3 
##     2900026A02Rik, Inpp4b, Tjp1, Mdfic, Havcr2, Rbms3, Cd226, AA467197, Coro2a, Capg 
## Negative:  Sidt1, Aff3, Sell, Rplp0, Klf3, Rps12, Rps2, Rpsa, 1700025G04Rik, Bach2 
##     Rps20, Itgb1, Pde2a, Nme2, Arhgef18, Ccr7, Cmah, Nsg2, Lef1, Rpl41 
##     Elmo1, Actn1, Itga4, Txk, Arhgap15, Slamf6, Il6ra, Fgf13, Ndufa4, H2afz 
## PC_ 4 
## Positive:  Lgals3, Lgals1, S100a6, Vim, Ifitm1, Ifitm2, Capg, Crip1, Ahnak, S100a10 
##     S100a4, Rap1gap2, Dgkh, Itgb1, Itsn1, Runx2, Nebl, Anxa1, Iqgap2, Cd48 
##     Nrp1, Ccr2, Ifitm3, Reep5, Emp3, Klf12, Fry, Ripor2, Rasa3, Swap70 
## Negative:  Plac8, Ifi27l2a, Xcl1, Gimap7, Ifngas1, Ighm, Btbd11, Tox, Eomes, Dapl1 
##     Isg15, Samd3, A330040F15Rik, Ifit1, Prkch, Chn2, Usp18, Ddx60, Rgs1, Setbp1 
##     Abtb2, Rps20, Oas1a, Rnf213, Ifit3, Atp8a2, Cpq, Rtp4, Ifit3b, Gzmk 
## PC_ 5 
## Positive:  Dtl, E2f1, Cdca7, Lig1, Cdc6, Chaf1a, Hells, Mcm5, Chek1, Uhrf1 
##     Rad51b, Mcm3, Mcm6, Mcm2, Timeless, Mcm4, Mcm7, Pole, Atad5, Pola1 
##     Brca1, Dhfr, Clspn, Cdc45, Mms22l, Rad51, Hsp90ab1, Cenps, Ranbp1, Fignl1 
## Negative:  Ube2c, Cenpf, Ccnb1, Aspm, Ccnb2, Kif2c, Hmmr, Prr11, Cdkn3, Pif1 
##     Pimreg, Cenpa, Ckap2, Ccna2, Cdc20, Birc5, Cdca3, Arhgap19, Gas2l3, Cenpe 
##     Mxd3, Kif14, Plk1, Nusap1, Cep55, Depdc1a, Nuf2, Cdc25c, Kif23, Aurka

Re-run Jackstraw and Elbow plots to determine new dimensionality.

sub1_scobj <- JackStraw(sub1_scobj, dims=30, num.replicate=100)
sub1_scobj <- ScoreJackStraw(sub1_scobj, dims=1:30)
JackStrawPlot(sub1_scobj, dims=1:30)
## Warning: Removed 57324 rows containing missing values (`geom_point()`).

ElbowPlot(sub1_scobj, ndims=30)

Rerun clustering and UMAP algorithms. Dims based on Jackstraw and Elbow plots.

sub1_scobj <- FindNeighbors(sub1_scobj, dims=1:25)
## Computing nearest neighbor graph
## Computing SNN
sub1_scobj <- FindClusters(sub1_scobj, resolution=0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 3753
## Number of edges: 147152
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8523
## Number of communities: 6
## Elapsed time: 0 seconds
sub1_scobj <- RunUMAP(sub1_scobj, dims=1:25)
## 13:56:53 UMAP embedding parameters a = 0.9922 b = 1.112
## 13:56:53 Read 3753 rows and found 25 numeric columns
## 13:56:53 Using Annoy for neighbor search, n_neighbors = 30
## 13:56:53 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 13:56:54 Writing NN index file to temp file /tmp/RtmpnVd11M/file31d891d9391ae
## 13:56:54 Searching Annoy index using 1 thread, search_k = 3000
## 13:56:55 Annoy recall = 100%
## 13:56:56 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 13:56:56 Initializing from normalized Laplacian + noise (using irlba)
## 13:56:57 Commencing optimization for 500 epochs, with 166828 positive edges
## 13:57:14 Optimization finished

Make Final Plots

Make copy of object for readability.

proc_scobj <- sub1_scobj

Make an object that combines the Other T clusters (0, 1, 3, 5) into one cluster (6), excluding the cluster of cycling cells (4).

comb_scobj <- proc_scobj
comb_scobj_ids <- c("6", "6", "2", "6", "4", "6")
names(comb_scobj_ids) <- levels(comb_scobj)
comb_scobj <- RenameIdents(comb_scobj, comb_scobj_ids)

UMAP and number of cells per cluster.

DimPlot(proc_scobj, reduction="umap", pt.size=2.5, cols=custom_color_palette)

table(Idents(proc_scobj))
## 
##    0    1    2    3    4    5 
## 1291 1243  589  358  170  102

LN Trm module score.

proc_scobj <- AddModuleScore(object=proc_scobj, features=list(LN_Trm_genes),
                             ctrl=100, name="LN_Trm")
## Warning: The following features are not present in the object: Amica1, Aw112010,
## Hmha1, not searching for symbol synonyms
FeaturePlot(proc_scobj, features="LN_Trm1", pt.size=2.5, 
            cols=rev(brewer.pal(n=11, name="RdBu")))

TGFb module score.

proc_scobj <- AddModuleScore(object=proc_scobj, features=list(TGFb_genes),
                             ctrl=100, name="TGFb")
## Warning: The following features are not present in the object: Mcpt2, Vipr2,
## Gpr56, Cd33, Dlk1, Gsg2, Klhl30, 1810011H11Rik, Tfr2, Htra3, D8Ertd82e,
## Epb4.1l2, Fam101b, 9430020K01Rik, 1700049G17Rik, Sepn1, Prrt2, Zfp820,
## D3Ertd254e, Ccrn4l, Tmem57, Cd244, Tmem2, Fam46a, not searching for symbol
## synonyms
FeaturePlot(proc_scobj, features="TGFb1", pt.size=2.5, 
            cols=rev(brewer.pal(n=11, name="RdBu")))

Violin plots with the combined cluster object.

clusters_of_interest <- c("2", "6") # exclude cycling cells in 4
violin_colors <- c("#FFFFFF", "#7570B3")
VlnPlot(comb_scobj, features=c("Gzmb", "Gzmk", "Prf1"), 
        cols=violin_colors, idents=clusters_of_interest)

VlnPlot(comb_scobj, features=c("Csf1", "Ccl4", "Ccl5"), 
        cols=violin_colors, idents=clusters_of_interest)

VlnPlot(comb_scobj, features=c("Klf2", "S1pr1", "Tcf7"), 
        cols=violin_colors, idents=clusters_of_interest)

Feature plots that export individually.

feature_gene_list <- c("Ccr7", "Sell", "S1pr1", "Gzmb",
                       "Cxcr6", "Itgae", "Csf1")
for(i in 1:length(feature_gene_list)){
  print(FeaturePlot(proc_scobj, features=feature_gene_list[i], pt.size=2.5))
}

Heatmap of top 10 genes per cluster.

proc_scobj.markers <- FindAllMarkers(proc_scobj, only.pos=TRUE, min.pct=0.25,
                                     logfc.threshold=0.25)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 5
proc_scobj.markers %>%
  group_by(cluster) %>%
  top_n(n=10, wt=avg_log2FC) -> proc_top10
DoHeatmap(proc_scobj, features=proc_top10$gene, 
          group.colors=custom_color_palette) + NoLegend()

Volcano plot of differentially expressed genes between Trm (cluster 2) and Other T (clusters 0, 1, 3, 5).

cluster2vT_vp.markers <- FindMarkers(proc_scobj, 
                                     ident.1=2, 
                                     ident.2=c(0, 1, 3, 5), 
                                     min.pct=0.25)
EnhancedVolcano(cluster2vT_vp.markers,
                lab=rownames(cluster2vT_vp.markers),
                x='avg_log2FC',
                y='p_val_adj',
                title="Trm vs Other T",
                pCutoff=0.01,
                FCcutoff=0.5,
                pointSize=3.0,
                labSize=5.0,
                selectLab=NULL)
## Warning: One or more p-values is 0. Converting to 10^-1 * current lowest
## non-zero p-value...